delta.meqtl.cpg.anno.df$Relation_to_Island <- factor(delta.meqtl.cpg.anno.df$Relation_to_Island, levels = c("N_Shelf",
"N_Shore", "Island", "S_Shore", "S_Shelf", "OpenSea"))
delta.meqtl.cpg.anno.df[["Model"]] <- as.factor("Delta")
veh.meqtl.cpg.anno.df$Relation_to_Island <- factor(veh.meqtl.cpg.anno.df$Relation_to_Island, levels = c("N_Shelf",
"N_Shore", "Island", "S_Shore", "S_Shelf", "OpenSea"))
veh.meqtl.cpg.anno.df[["Model"]] <- as.factor("Baseline")
dex.meqtl.cpg.anno.df$Relation_to_Island <- factor(dex.meqtl.cpg.anno.df$Relation_to_Island, levels = c("N_Shelf",
"N_Shore", "Island", "S_Shore", "S_Shelf", "OpenSea"))
dex.meqtl.cpg.anno.df[["Model"]] <- as.factor("Dex")
meqtl.cpg.anno.df <- rbind(delta.meqtl.cpg.anno.df, veh.meqtl.cpg.anno.df, dex.meqtl.cpg.anno.df)
head(meqtl.cpg.anno.df, 100)
head(cpg.closest.genes.dist.df, 100)
library(GenomicFeatures)
chr.length.tbl <- getChromInfoFromUCSC("hg19")[, c(1, 2)] %>%
setDT()
chr.length.tbl <- chr.length.tbl[chrom %in% paste0("chr", 1:22)] #, 'X', 'Y'))]
colnames(chr.length.tbl) <- c("chr", "chr_size")
chr.length.tbl$chr <- as.factor(chr.length.tbl$chr)
chr.order <- paste("chr", c(1:22), sep = "")
meth <- delta.meqtl.cpg.anno.df
meth$chr <- factor(meth$chr, levels = chr.order)
meth <- left_join(meth, chr.length.tbl)
library(janitor)
chr.cpgs.cnt <- tabyl(meth$chr, sort = T)
colnames(chr.cpgs.cnt) <- c("chr", "cnt_cpg", "freq_cpg")
chr.cpgs.cnt <- left_join(chr.cpgs.cnt, chr.length.tbl, by = "chr")
chr.cpgs.cnt$chr <- factor(chr.cpgs.cnt$chr, levels = chr.order)
chr.cpgs.cnt[["freq_chr"]] <- chr.cpgs.cnt$chr_size/sum(chr.cpgs.cnt$chr_size)
chr.cpgs.cnt[["cpg_chr_cnt"]] <- chr.cpgs.cnt$cnt_cpg/chr.cpgs.cnt$freq_chr
chr.cpgs.cnt[["cpg_chr_freq"]] <- chr.cpgs.cnt$cpg_chr_cnt/sum(chr.cpgs.cnt$cpg_chr_cnt)
# Relative freq = (Number of CpGs on the chromosome (i) * (length of chromosomes{1:22} / length of
# the chromosome(i)) ) in %
ggplot(chr.cpgs.cnt, aes(x = chr, y = cpg_chr_freq, fill = chr)) + geom_bar(stat = "identity", position = position_dodge()) +
geom_text(aes(label = scales::percent(cpg_chr_freq, accuracy = 0.1), y = cpg_chr_freq), stat = "identity",
vjust = -0.5, size = 3) + scale_y_continuous(labels = scales::percent) + labs(title = "Delta meQTLs CpGs distribution across the chromosomes wieghted by the chromosomes' length",
x = "Chromosome", y = "Relative frequency") + theme(panel.background = element_blank(), plot.title = element_text(size = 8),
axis.title = element_text(size = 8), axis.text.x = element_text(angle = 20, hjust = 0.5), legend.position = "none")
CpGs distribution across chromosomes
cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")
plt.axis.size <- 8
ggplot(meqtl.cpg.anno.df, aes(x = Model, fill = Relation_to_Island)) +
geom_bar( position = "fill") +
geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1),
y = (..count..)/sum(..count..)),
stat = "prop",
position = position_fill(vjust = .5), size = 3) +
scale_y_continuous(labels = scales::percent) +
labs(x = "",
y = "Percentage of CpGs",
title = "") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.title = element_text(size = 8),
axis.title = element_text(size = 8),
axis.text.x = element_text(angle = 0, hjust = 0.5)) +
scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of the CpGs in different genomic regions based on Illumina EPIC annotation
cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")
ggplot(meqtl.cpg.anno.df[Model %in% c("Delta", "Baseline")], aes(x = Model, fill = Relation_to_Island)) +
geom_bar( position = "fill") +
geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1),
y = (..count..)/sum(..count..)),
stat = "prop",
position = position_fill(vjust = .5), size = 3) +
scale_y_continuous(labels = scales::percent) +
labs(x = "",
y = "Percentage of CpGs",
title = "") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.title = element_text(size = 8),
axis.title = element_text(size = 8),
axis.text.x = element_text(angle = 0, hjust = 0.5)) +
scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of the CpGs in different genomic regions based on Illumina EPIC annotation
cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")
ggplot(meqtl.cpg.anno.df[Model %in% c("Delta")], aes(x = Model, fill = Relation_to_Island)) +
geom_bar( stat = "count") +
geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1),
label = scales::percent((..count..)/sum(..count..), accuracy = 0.1)),
stat = "prop",
position = position_stack(vjust = 0.6, ), size = plt.axis.size / 2, color = "white") +
coord_polar(theta = 'y', start = 0) +
labs(x = " ",
y = "Percentage of CpGs",
title = "") +
theme(legend.position = "right",
legend.title = element_blank(),
legend.text = element_text(size = plt.axis.size),
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.title = element_text(size = 8),
axis.title = element_text(size = plt.axis.size),
axis.text.x = element_blank()) +
scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of delta meQTL CpGs in different genomic regions based on Illumina EPIC annotation
cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")
ggplot(meqtl.unique.cpg.anno.df, aes(x = Model, fill = Relation_to_Island)) +
geom_bar( position = "fill") +
geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1),
y = (..count..)/sum(..count..)),
stat = "prop",
position = position_fill(vjust = .5), size = 3) +
scale_y_continuous(labels = scales::percent) +
labs(x = "",
y = "Percentage of CpGs",
title = "Unique meCpGs") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.title = element_text(size = 10),
axis.title = element_text(size = 8),
axis.text.x = element_text(angle = 0, hjust = 0.5)) +
scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of the meCpGs in different genomic regions based on Illumina EPIC annotation
Annotation with Chipseeker
cbPalette <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933",
"#882255", "#661100", "#6699CC", "#888888")
ggplot(meqtl.snp.anno.stat.df, aes(x = Model, y = Frequency, fill = Feature)) + geom_bar(position = "fill",
stat = "identity") + geom_text(data = subset(meqtl.snp.anno.stat.df, Frequency > 2), aes(by = Model,
label = Frequency), position = position_fill(vjust = 0.5), size = 3) + labs(x = "", y = "Percentage of CpGs",
title = "meCpGs annotation from UCSC for the hg19 genome build using TxDb.Hsapiens.UCSC.hg19.knownGene and ChIPseeker Bioconductor R packages") +
theme(legend.position = "right", panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_text(angle = 0,
hjust = 0.5)) + scale_fill_manual("Feature", values = cbPalette)
Distribution of gene-centric locations of the meCpGs based on Illumina EPIC annotation
cbPalette <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933",
"#882255", "#661100", "#6699CC", "#888888")
ggplot(meqtl.snp.anno.stat.df[meqtl.snp.anno.stat.df$Model == "Delta", ], aes(x = Model, y = Frequency,
fill = Feature)) + geom_col() + geom_text(data = subset(meqtl.snp.anno.stat.df[meqtl.snp.anno.stat.df$Model ==
"Delta", ], Frequency > 2), aes(by = Model, label = paste0(Frequency, "%")), position = position_stack(vjust = 0.8,
), size = plt.axis.size/2, color = "white") + coord_polar(theta = "y", start = 0) + labs(x = " ",
y = "Percentage of CpGs", title = "") + theme(legend.position = "right", legend.title = element_blank(),
legend.text = element_text(size = plt.axis.size), panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_blank()) +
scale_fill_manual("Feature", values = cbPalette)
Distribution of gene-centric locations of the delta meCpGs based on Illumina EPIC annotation
gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
"Non-significant")
ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = or.odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 0,
hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.
gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
"Non-significant")
ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = or.odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_text(angle = 0,
hjust = 0.5)) + scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.
gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
"Non-significant")
ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = or.odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_text(angle = 0,
hjust = 0.5)) + scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.
gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
"Non-significant")
ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 20,
hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.
gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
"Non-significant")
ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 20,
hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.
gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
"Non-significant")
ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 20,
hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.
Histone mark enrichment for the GR-induced meCpGs using 15 chromatin states from 18 blood and T- & B-cells types. The first figure denotes the fold enrichment / depletion as compared to baseline meCpGs, the second figure denotes significance based on the empirical P≤0.05 with 1,000 permutations.
enrich.plts$or
The figure denotes the fold enrichment / depletion as compared to baseline meCpGs
enrich.plts$pval
The figure denotes significance based on the empirical P≤0.05 with 1,000 permutations
Histone mark enrichment for the GR-induced meCpGs using condensed annotation of 20 chromatin states from 18 blood and T- & B-cells types. The first figure denotes the fold enrichment / depletion as compared to baseline meCpGs, the second figure denotes significance based on the empirical P≤0.05 with 1,000 permutations.
enrich.plts$or
The figure denotes the fold enrichment / depletion as compared to baseline meCpGs
enrich.plts$pval
The figure denotes significance based on the empirical P≤0.05 with 1,000 permutations